####################################################
####################################################
# 
#               TIME 1, AUGUST 2018
#
####################################################
####################################################
library(car)
library(psych)

path.to.first.data.set="<Your path here>/august 2018.csv"
dat1 <- read.csv(path.to.first.data.set)
dat1=dat1[-c(1:2),]

####################### DEMOGRAPHICS
nrow(dat1)

ages=as.numeric(dat1$age)
mean(ages,na.rm=T)
sd(ages,na.rm=T)

table(dat1$sex)
# Female, 72, 0.4363636
# Male, 92, 0.5575758
# Other, 1, 0.006060606


table(dat1$ethnicity)
# White	127	0.774390244
# Black	14	0.085365854
# Asian American	13	0.079268293
# Hispanic	8	0.048780488
# Native American or Alaskan Native	1	0.006097561
# Prefer not to say	1	0.006097561

table(dat1$education)
# No college	13	0.079268293
# Some college	51	0.31097561
# College Degree	70	0.426829268
# Some post-graduate work	5	0.030487805
# Post-graduate degree	25	0.152439024
# 



####################### CODING DATA / PSYCHOMETRIC CHECKS
####################### CHICKEN POX QUESTIONS
dat1$GVax1=as.numeric(car::recode(dat1$POX.intro, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$GVax2=as.numeric(car::recode(dat1$POX2, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$GVax3=as.numeric(car::recode(dat1$POX4, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$GVax3.1=as.numeric(car::recode(dat1$POX5, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$GVax4=as.numeric(car::recode(dat1$POX7, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$GVax5=as.numeric(car::recode(dat1$POX9, "'Very bad'=1; 'Bad'=2; 'Somewhat bad'=3; 'Neither good nor bad'=4; 'Somewhat good'=5; 'Good'=6; 'Very good'=7"))
dat1$GVax6=as.numeric(car::recode(dat1$POX10, "'Very harmful'=1; 'Harmful'=2; 'Somewhat harmful'=3; 'Neither beneficial nor harmful'=4; 'Somewhat beneficial'=5; 'Beneficial'=6; 'Very beneficial'=7"))
dat1$GVax7=as.numeric(car::recode(dat1$POX11, "'Very foolish'=1; 'Foolish'=2; 'Somewhat foolish'=3; 'Neither wise nor foolish'=4; 'Somewhat wise'=5; 'Wise'=6; 'Very wise'=7"))
dat1$GVax8=as.numeric(car::recode(dat1$POX13, "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))
dat1$GVax9=as.numeric(car::recode(dat1$POX13, "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))
dat1$GVax10=as.numeric(car::recode(dat1$POX13, "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))
dat1$GVax11=as.numeric(car::recode(dat1$POX13, "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))
dat1$GVax12=as.numeric(car::recode(dat1$POX13, "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))
dat1$GVax13=as.numeric(car::recode(dat1$POX13, "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))
dat1$GVax14=as.numeric(car::recode(dat1$POX13, "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))
dat1$GVax15=as.numeric(car::recode(dat1$POX13, "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))

gen.vax=cbind(dat1$GVax1,dat1$GVax2,dat1$GVax3,dat1$GVax4,dat1$GVax5,dat1$GVax6,dat1$GVax7,dat1$GVax8,
              dat1$GVax9,dat1$GVax10,dat1$GVax3.1,dat1$GVax11,dat1$GVax12,dat1$GVax13,dat1$GVax14,dat1$GVax15)
gen.vax=rowMeans(gen.vax)

####################### CHICKEN POX QUESTIONS
dat1$pox.fam=as.numeric(car::recode(dat1$POX.intro, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$pox2=as.numeric(car::recode(dat1$POX2, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$pox3=as.numeric(car::recode(dat1$POX4, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$pox4=as.numeric(car::recode(dat1$POX5, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$pox5=8-as.numeric(car::recode(dat1$POX7, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$pox6=8-as.numeric(car::recode(dat1$POX9, "'Very bad'=1; 'Bad'=2; 'Somewhat bad'=3; 'Neither good nor bad'=4; 'Somewhat good'=5; 'Good'=6; 'Very good'=7"))
dat1$pox7=8-as.numeric(car::recode(dat1$POX10, "'Very harmful'=1; 'Harmful'=2; 'Somewhat harmful'=3; 'Neither beneficial nor harmful'=4; 'Somewhat beneficial'=5; 'Beneficial'=6; 'Very beneficial'=7"))
dat1$pox8=8-as.numeric(car::recode(dat1$POX11, "'Very foolish'=1; 'Foolish'=2; 'Somewhat foolish'=3; 'Neither wise nor foolish'=4; 'Somewhat wise'=5; 'Wise'=6; 'Very wise'=7"))
dat1$pox9=8-as.numeric(car::recode(dat1$POX13, "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))

POX=cbind(dat1$pox.fam,dat1$pox2,dat1$pox3,dat1$pox4,dat1$pox5,dat1$pox6,dat1$pox7,dat1$pox8,dat1$pox9)
POX.PAR=fa.parallel(POX, fm = 'minres', fa = 'fa')
POX.Factor <- fa(POX,nfactors = 1,rotate = "oblimin",fm="minres")
print(POX.Factor$loadings)

POX=cbind(dat1$pox2,dat1$pox3,dat1$pox4,dat1$pox5,dat1$pox6,dat1$pox7,dat1$pox8,dat1$pox9)
POX.PAR=fa.parallel(POX, fm = 'minres', fa = 'fa')
POX.Factor <- fa(POX,nfactors = 1,rotate = "oblimin",fm="minres")
print(POX.Factor$loadings)


pox1=rowMeans(POX)
psych::alpha(POX)[1]



####################### MMR QUESTIONS
dat1$mmr.fam=as.numeric(car::recode(dat1$MMR.intro, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$mmr2=as.numeric(car::recode(dat1$MMR2, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$mmr3=as.numeric(car::recode(dat1$MMR4, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$mmr4=as.numeric(car::recode(dat1$MMR5, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$mmr5=8-as.numeric(car::recode(dat1$MMR7, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$mmr6=8-as.numeric(car::recode(dat1$MMR9, "'Very bad'=1; 'Bad'=2; 'Somewhat bad'=3; 'Neither good nor bad'=4; 'Somewhat good'=5; 'Good'=6; 'Very good'=7"))
dat1$mmr7=8-as.numeric(car::recode(dat1$MMR10, "'Very harmful'=1; 'Harmful'=2; 'Somewhat harmful'=3; 'Neither beneficial nor harmful'=4; 'Somewhat beneficial'=5; 'Beneficial'=6; 'Very beneficial'=7"))
dat1$mmr8=8-as.numeric(car::recode(dat1$MMR11, "'Very foolish'=1; 'Foolish'=2; 'Somewhat foolish'=3; 'Neither wise nor foolish'=4; 'Somewhat wise'=5; 'Wise'=6; 'Very wise'=7"))
dat1$mmr9=8-as.numeric(car::recode(dat1$MMR13, "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))

MMR=cbind(dat1$mmr.fam,dat1$mmr2,dat1$mmr3,dat1$mmr4,dat1$mmr5,dat1$mmr6,dat1$mmr7,dat1$mmr8,dat1$mmr9)
MMR.PAR=fa.parallel(MMR, fm = 'minres', fa = 'fa')
MMR.Factor <- fa(MMR,nfactors = 1,rotate = "oblimin",fm="minres")
print(MMR.Factor$loadings)

MMR=cbind(dat1$mmr2,dat1$mmr3,dat1$mmr4,dat1$mmr5,dat1$mmr6,dat1$mmr7,dat1$mmr8,dat1$mmr9)
MMR.PAR=fa.parallel(MMR, fm = 'minres', fa = 'fa')
MMR.Factor <- fa(MMR,nfactors = 1,rotate = "oblimin",fm="minres")
print(MMR.Factor$loadings)

mmr1=rowMeans(MMR)
psych::alpha(MMR)[1]


####################### HPV QUESTIONS
dat1$hpv.fam=as.numeric(car::recode(dat1$HPV.intro, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$hpv2=as.numeric(car::recode(dat1$HPV2, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$hpv3=as.numeric(car::recode(dat1$HPV4, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$hpv4=as.numeric(car::recode(dat1$HPV5, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$hpv5=8-as.numeric(car::recode(dat1$HPV7, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$hpv6=8-as.numeric(car::recode(dat1$HPV9, "'Very bad'=1; 'Bad'=2; 'Somewhat bad'=3; 'Neither good nor bad'=4; 'Somewhat good'=5; 'Good'=6; 'Very good'=7"))
dat1$hpv7=8-as.numeric(car::recode(dat1$HPV10, "'Very harmful'=1; 'Harmful'=2; 'Somewhat harmful'=3; 'Neither beneficial nor harmful'=4; 'Somewhat beneficial'=5; 'Beneficial'=6; 'Very beneficial'=7"))
dat1$hpv8=8-as.numeric(car::recode(dat1$HPV11, "'Very foolish'=1; 'Foolish'=2; 'Somewhat foolish'=3; 'Neither wise nor foolish'=4; 'Somewhat wise'=5; 'Wise'=6; 'Very wise'=7"))
dat1$hpv9=8-as.numeric(car::recode(dat1$HPV13, "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))

HPV=cbind(dat1$hpv.fam,dat1$hpv2,dat1$hpv3,dat1$hpv4,dat1$hpv5,dat1$hpv6,dat1$hpv7,dat1$hpv8,dat1$hpv9)
HPV.PAR=fa.parallel(HPV, fm = 'minres', fa = 'fa')
HPV.Factor <- fa(HPV,nfactors = 1,rotate = "oblimin",fm="minres")
print(HPV.Factor$loadings)

HPV=cbind(dat1$hpv2,dat1$hpv3,dat1$hpv4,dat1$hpv5,dat1$hpv6,dat1$hpv7,dat1$hpv8,dat1$hpv9)
HPV.PAR=fa.parallel(HPV, fm = 'minres', fa = 'fa')
HPV.Factor <- fa(HPV,nfactors = 1,rotate = "oblimin",fm="minres")
print(HPV.Factor$loadings)

hpv1=rowMeans(HPV)
psych::alpha(HPV)[1]




####################### FLU QUESTIONS
dat1$flu.fam=as.numeric(car::recode(dat1$FluFamiliar, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$flu2=as.numeric(car::recode(dat1$Flu2, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$flu3=as.numeric(car::recode(dat1$Flu4, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$flu4=as.numeric(car::recode(dat1$Flu5, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$flu5=8-as.numeric(car::recode(dat1$Flu7, "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat1$flu6=8-as.numeric(car::recode(dat1$Flu9, "'Very bad'=1; 'Bad'=2; 'Somewhat bad'=3; 'Neither good nor bad'=4; 'Somewhat good'=5; 'Good'=6; 'Very good'=7"))
dat1$flu7=8-as.numeric(car::recode(dat1$Flu10, "'Very harmful'=1; 'Harmful'=2; 'Somewhat harmful'=3; 'Neither beneficial nor harmful'=4; 'Somewhat beneficial'=5; 'Beneficial'=6; 'Very beneficial'=7"))
dat1$flu8=8-as.numeric(car::recode(dat1$Flu11, "'Very foolish'=1; 'Foolish'=2; 'Somewhat foolish'=3; 'Neither wise nor foolish'=4; 'Somewhat wise'=5; 'Wise'=6; 'Very wise'=7"))
dat1$flu9=8-as.numeric(car::recode(dat1$Flu13, "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))

FLU=cbind(dat1$flu.fam,dat1$flu2,dat1$flu3,dat1$flu4,dat1$flu5,dat1$flu6,dat1$flu7,dat1$flu8,dat1$flu9)
FLU.PAR=fa.parallel(FLU, fm = 'minres', fa = 'fa')
FLU.Factor <- fa(FLU,nfactors = 1,rotate = "oblimin",fm="minres")
print(FLU.Factor$loadings)

FLU=cbind(dat1$flu2,dat1$flu3,dat1$flu4,dat1$flu5,dat1$flu6,dat1$flu7,dat1$flu8,dat1$flu9)
FLU.PAR=fa.parallel(FLU, fm = 'minres', fa = 'fa')
FLU.Factor <- fa(FLU,nfactors = 1,rotate = "oblimin",fm="minres")
print(FLU.Factor$loadings)

flu1=rowMeans(FLU)
psych::alpha(FLU)[1]




####################### Cluster analysis
# https://uc-r.github.io/kmeans_clustering
library(tidyverse)  # data manipulation
library(cluster)    # clustering algorithms
library(factoextra) # clustering algorithms & visualization

colnames(dat1)

AGE=dat1$age
male=ifelse(dat1$sex=="Male",1,0)
white=ifelse(dat1$ethnicity=="White or European-American",1,0)
swing.state=ifelse(dat1$state.vote=="S",1,0)
pop=dat1$pop
red.state=ifelse(dat1$state.vote=="R",1,0)

education=ifelse(dat1$education=="Some high school",1,0)
education=ifelse(dat1$education=="Some college",2,education)
education=ifelse(dat1$education=="College degree",3,education)
education=ifelse(dat1$education=="Some post-graduate work",4,education)
education=ifelse(dat1$education=="Post-graduate degree",5,education)

TRUMP=dat1$trump

# https://uc-r.github.io/kmeans_clustering
library(tidyverse)  # data manipulation
library(cluster)    # clustering algorithms
library(factoextra) # clustering algorithms & visualization




df = data.frame(cbind(AGE,male,white,scale(education),red.state,swing.state,scale(pop),TRUMP,
                      scale(gen.vax),scale(flu1),scale(hpv1),scale(mmr1),scale(pox1)
                      ))
colnames(df)[c(4,7:13)]=c("education","pop","Trump","gen.vax","flu","hpv","mmr","pox")

colnames(df)[9:13]
df <- na.omit(df)
df.minus=df[,-c(9:13)]
distance <- get_dist(df.minus)
fviz_dist(distance, gradient = list(low = "#00AFBB", mid = "white", high = "#FC4E07"))
k2 <- kmeans(df.minus, centers = 2, nstart = 25)
str(k2)


k2$centers

table(k2$cluster)

compare.cluster=matrix(c(113,39,148,44),ncol=2,byrow=TRUE)
colnames(compare.cluster)=c("lib","con")
rownames(compare.cluster)=c("S1","S2")
TesT=chisq.test(compare.cluster)
TesT

t.test(as.numeric(df$gen.vax),as.numeric(k2$cluster))

# flu
t.test(as.numeric(df$flu),as.numeric(k2$cluster))
cluster1.flu=as.numeric(subset(df,k2$cluster==1)$flu)
cluster2.flu=as.numeric(subset(df,k2$cluster==2)$flu)
t.test(cluster1.flu,cluster2.flu)

# hpv
t.test(as.numeric(df$hpv),as.numeric(k2$cluster))
cluster1.hpv=as.numeric(subset(df,k2$cluster==1)$hpv)
cluster2.hpv=as.numeric(subset(df,k2$cluster==2)$hpv)
t.test(cluster1.hpv,cluster2.hpv)


t.test(as.numeric(df$mmr),as.numeric(k2$cluster))
t.test(as.numeric(df$pox),as.numeric(k2$cluster))



# Put people's clusters (if they have one) into dat1
nrow(dat1)

# Recreate "df" before NAs are removed
df = data.frame(cbind(AGE,male,white,scale(education),red.state,swing.state,scale(pop),TRUMP,
                      scale(gen.vax),scale(flu1),scale(hpv1),scale(mmr1),scale(pox1)
))
colnames(df)[c(4,7:13)]=c("education","pop","Trump","gen.vax","flu","hpv","mmr","pox")


cluster.assignments=rep(NA,length(flu1))
cluster.index=1
# Iterate through rows of df
for (i in 1:nrow(df)) {
  if(anyNA(df[i,])){
    cluster.assignments[i]=NA
  }
  else{
    cluster.assignments[i]=k2$cluster[cluster.index]
    cluster.index=cluster.index+1
  }
  
}





# How many clusters?
k1 <- kmeans(df.minus, centers = 1, nstart = 25)
k2 <- kmeans(df.minus, centers = 2, nstart = 25)
k3 <- kmeans(df.minus, centers = 3, nstart = 25)
k4 <- kmeans(df.minus, centers = 4, nstart = 25)
k5 <- kmeans(df.minus, centers = 5, nstart = 25)

plot(1:5, c(k1$tot.withinss, k2$tot.withinss, k3$tot.withinss, k4$tot.withinss,
            k5$tot.withinss), type = "b", pch = 19, xlab = "Number of Clusters",
      ylab= "Total Within-Cluster Sum of Squares", main = "", xaxt = "n",
      cex.axis =.75)
axis(1, 1:5, 1:5, cex.axis = .75)



####################################################
####################################################
# 
#               TIME 2, April, 5 2023
#
####################################################
####################################################
file.path.to.second.data.set="<Your path here>/Spring 2023 Vaccine Opinions (Responses) - Form Responses 1.csv"
dat2 <- read.csv(file.path.to.second.data.set, dec=",")

# Remove the 2 test runs at the beginning
dat2=dat2[-c(1:2),]

# Demographics, plus statistical test comparing pre- and post-pandemic samples
ages2=dat2$How.old.are.you.as.of.today.s.date..numerals.only..for.example.21.or.56..
mean(ages2,na.rm=T)
sd(ages2,na.rm=T)

# Difference in age between samples?
t.test(ages,ages2)
t.test(ages,ages2,var.equal=0)

table(dat2$What.is.your.sex.)
# Female, 135, 0.5555556
# Male, 106, 0.436214
# Other, 2, 0.008230453

# Sex differences between studies?
sex.chi=matrix(c(92,72,106,135),byrow=1,nrow=2)
rownames(sex.chi)=c("S1","S2")
colnames(sex.chi)=c("male","female")
chisq.test(sex.chi)


table(dat2$What.is.your.ethnicity)
# White,	173,	0.711934156
# Asian,	21,	0.086419753
# Black,	18,	0.074074074
# Hispanic,	14,	0.057613169
# Multiple or Prefer not to say,	17,	0.069958848

# White / non-White differences between studies?
race.chi=matrix(c(127,165-127,173,243-173),byrow=1,nrow=2)
rownames(race.chi)=c("S1","S2")
colnames(race.chi)=c("White","non-White")
chisq.test(race.chi)




table(dat2$What.is.the.highest.level.of.education.you.have.completed.)
# High school (or less): 66, 0.2716049
# College degree: 124, 0.5102881
# Some post-graduate work: 11, 0.04526749
# Post-graduate degree: 41, 0.1687243
# Prefer not to say: 1, 0.004115226


# From study 1
# No college	13	0.079268293
# Some college	51	0.31097561
# College Degree	70	0.426829268
# Some post-graduate work	5	0.030487805
# Post-graduate degree	25	0.152439024

#     No college degree   College degree  Post-graduate work or degree
# S1    64                    70                  30
# S2    66                    124                 52

edu.chi=matrix(c(64,70,30,66,124,52),byrow=T,nrow=2)
rownames(edu.chi)=c("S1","S2")
colnames(edu.chi)=c("No college","college degree","Post grad+")

TesT=chisq.test(edu.chi)
TesT$stdres




####################### CODING DATA AND PSYCHOMETRIC CHECKS

####################### CHICKEN POX QUESTIONS
dat2$pox.fam=as.numeric(car::recode(dat2[,8], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$pox2=as.numeric(car::recode(dat2[,9], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$pox3=as.numeric(car::recode(dat2[,10], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$pox4=as.numeric(car::recode(dat2[,11], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$pox5=8-as.numeric(car::recode(dat2[,12], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$pox6=8-as.numeric(car::recode(dat2[,13], "'Very bad'=1; 'Bad'=2; 'Somewhat bad'=3; 'Neither good nor bad'=4; 'Somewhat good'=5; 'Good'=6; 'Very good'=7"))
dat2$pox7=8-as.numeric(car::recode(dat2[,14], "'Very harmful'=1; 'Harmful'=2; 'Somewhat harmful'=3; 'Neither beneficial nor harmful'=4; 'Somewhat beneficial'=5; 'Beneficial'=6; 'Very beneficial'=7"))
dat2$pox8=8-as.numeric(car::recode(dat2[,15], "'Very foolish'=1; 'Foolish'=2; 'Somewhat foolish'=3; 'Neither wise nor foolish'=4; 'Somewhat wise'=5; 'Wise'=6; 'Very wise'=7"))
dat2$pox9=8-as.numeric(car::recode(dat2[,16], "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))

POX=cbind(dat2$pox.fam,dat2$pox2,dat2$pox3,dat2$pox4,dat2$pox5,dat2$pox6,dat2$pox7,dat2$pox8,dat2$pox9)
POX.PAR=fa.parallel(POX, fm = 'minres', fa = 'fa')
POX.Factor <- fa(POX,nfactors = 1,rotate = "oblimin",fm="minres")
print(POX.Factor$loadings)

POX=cbind(dat2$pox2,dat2$pox3,dat2$pox4,dat2$pox5,dat2$pox6,dat2$pox7,dat2$pox8,dat2$pox9)
POX.PAR=fa.parallel(POX, fm = 'minres', fa = 'fa')
POX.Factor <- fa(POX,nfactors = 1,rotate = "oblimin",fm="minres")
print(POX.Factor$loadings)

pox2=rowMeans(POX)
psych::alpha(POX)[1]



####################### MMR QUESTIONS
dat2$mmr.fam=as.numeric(car::recode(dat2[,26], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$mmr2=as.numeric(car::recode(dat2[,27], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$mmr3=as.numeric(car::recode(dat2[,28], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$mmr4=as.numeric(car::recode(dat2[,29], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$mmr5=8-as.numeric(car::recode(dat2[,30], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$mmr6=8-as.numeric(car::recode(dat2[,31], "'Very bad'=1; 'Bad'=2; 'Somewhat bad'=3; 'Neither good nor bad'=4; 'Somewhat good'=5; 'Good'=6; 'Very good'=7"))
dat2$mmr7=8-as.numeric(car::recode(dat2[,32], "'Very harmful'=1; 'Harmful'=2; 'Somewhat harmful'=3; 'Neither beneficial nor harmful'=4; 'Somewhat beneficial'=5; 'Beneficial'=6; 'Very beneficial'=7"))
dat2$mmr8=8-as.numeric(car::recode(dat2[,33], "'Very foolish'=1; 'Foolish'=2; 'Somewhat foolish'=3; 'Neither wise nor foolish'=4; 'Somewhat wise'=5; 'Wise'=6; 'Very wise'=7"))
dat2$mmr9=8-as.numeric(car::recode(dat2[,34], "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))

# Need to show that familiarity doesn't belong
MMR=cbind(dat2$mmr.fam,dat2$mmr2,dat2$mmr3,dat2$mmr4,dat2$mmr5,dat2$mmr6,dat2$mmr7,dat2$mmr8,dat2$mmr9)
MMR.PAR=fa.parallel(MMR, fm = 'minres', fa = 'fa')
MMR.Factor <- fa(MMR,nfactors = 1,rotate = "oblimin",fm="minres")
print(MMR.Factor$loadings)

MMR=cbind(dat2$mmr2,dat2$mmr3,dat2$mmr4,dat2$mmr5,dat2$mmr6,dat2$mmr7,dat2$mmr8,dat2$mmr9)
MMR.PAR=fa.parallel(MMR, fm = 'minres', fa = 'fa')
MMR.Factor <- fa(MMR,nfactors = 1,rotate = "oblimin",fm="minres")
print(MMR.Factor$loadings)

mmr2=rowMeans(MMR)
psych::alpha(MMR)[1]




####################### HPV QUESTIONS
dat2$hpv.fam=as.numeric(car::recode(dat2[,35], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$hpv2=as.numeric(car::recode(dat2[,36], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$hpv3=as.numeric(car::recode(dat2[,37], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$hpv4=as.numeric(car::recode(dat2[,38], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$hpv5=8-as.numeric(car::recode(dat2[,39], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$hpv6=8-as.numeric(car::recode(dat2[,40], "'Very bad'=1; 'Bad'=2; 'Somewhat bad'=3; 'Neither good nor bad'=4; 'Somewhat good'=5; 'Good'=6; 'Very good'=7"))
dat2$hpv7=8-as.numeric(car::recode(dat2[,41], "'Very harmful'=1; 'Harmful'=2; 'Somewhat harmful'=3; 'Neither beneficial nor harmful'=4; 'Somewhat beneficial'=5; 'Beneficial'=6; 'Very beneficial'=7"))
dat2$hpv8=8-as.numeric(car::recode(dat2[,42], "'Very foolish'=1; 'Foolish'=2; 'Somewhat foolish'=3; 'Neither wise nor foolish'=4; 'Somewhat wise'=5; 'Wise'=6; 'Very wise'=7"))
dat2$hpv9=8-as.numeric(car::recode(dat2[,43], "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))

HPV=cbind(dat2$hpv.fam,dat2$hpv2,dat2$hpv3,dat2$hpv4,dat2$hpv5,dat2$hpv6,dat2$hpv7,dat2$hpv8,dat2$hpv9)
HPV.PAR=fa.parallel(HPV, fm = 'minres', fa = 'fa')
HPV.Factor <- fa(HPV,nfactors = 1,rotate = "oblimin",fm="minres")
print(HPV.Factor$loadings)

HPV=cbind(dat2$hpv2,dat2$hpv3,dat2$hpv4,dat2$hpv5,dat2$hpv6,dat2$hpv7,dat2$hpv8,dat2$hpv9)
HPV.PAR=fa.parallel(HPV, fm = 'minres', fa = 'fa')
HPV.Factor <- fa(HPV,nfactors = 1,rotate = "oblimin",fm="minres")
print(HPV.Factor$loadings)

hpv2=rowMeans(HPV)
psych::alpha(HPV)[1]



####################### FLU QUESTIONS
dat2$flu.fam=as.numeric(car::recode(dat2[,44], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$flu2=as.numeric(car::recode(dat2[,45], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$flu3=as.numeric(car::recode(dat2[,46], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$flu4=as.numeric(car::recode(dat2[,47], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$flu5=8-as.numeric(car::recode(dat2[,48], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$flu6=8-as.numeric(car::recode(dat2[,49], "'Very bad'=1; 'Bad'=2; 'Somewhat bad'=3; 'Neither good nor bad'=4; 'Somewhat good'=5; 'Good'=6; 'Very good'=7"))
dat2$flu7=8-as.numeric(car::recode(dat2[,50], "'Very harmful'=1; 'Harmful'=2; 'Somewhat harmful'=3; 'Neither beneficial nor harmful'=4; 'Somewhat beneficial'=5; 'Beneficial'=6; 'Very beneficial'=7"))
dat2$flu8=8-as.numeric(car::recode(dat2[,51], "'Very foolish'=1; 'Foolish'=2; 'Somewhat foolish'=3; 'Neither wise nor foolish'=4; 'Somewhat wise'=5; 'Wise'=6; 'Very wise'=7"))
dat2$flu9=8-as.numeric(car::recode(dat2[,52], "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))

FLU=cbind(dat2$flu.fam,dat2$flu2,dat2$flu3,dat2$flu4,dat2$flu5,dat2$flu6,dat2$flu7,dat2$flu8,dat2$flu9)
FLU.PAR=fa.parallel(FLU, fm = 'minres', fa = 'fa')
FLU.Factor <- fa(FLU,nfactors = 1,rotate = "oblimin",fm="minres")
print(FLU.Factor$loadings)

FLU=cbind(dat2$flu2,dat2$flu3,dat2$flu4,dat2$flu5,dat2$flu6,dat2$flu7,dat2$flu8,dat2$flu9)
FLU.PAR=fa.parallel(FLU, fm = 'minres', fa = 'fa')
FLU.Factor <- fa(FLU,nfactors = 1,rotate = "oblimin",fm="minres")
print(FLU.Factor$loadings)

flu2=rowMeans(FLU)
psych::alpha(FLU)[1]



####################### COVID QUESTIONS
dat2$covid.fam=as.numeric(car::recode(dat2[,17], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$covid2=as.numeric(car::recode(dat2[,18], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$covid3=as.numeric(car::recode(dat2[,19], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$covid4=as.numeric(car::recode(dat2[,20], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$covid5=8-as.numeric(car::recode(dat2[,21], "'Strongly disagree'=1; 'Disagree'=2; 'Somewhat disagree'=3; 'Neither agree nor disagree'=4; 'Somewhat agree'=5; 'Agree'=6; 'Strongly agree'=7"))
dat2$covid6=8-as.numeric(car::recode(dat2[,22], "'Very bad'=1; 'Bad'=2; 'Somewhat bad'=3; 'Neither good nor bad'=4; 'Somewhat good'=5; 'Good'=6; 'Very good'=7"))
dat2$covid7=8-as.numeric(car::recode(dat2[,23], "'Very harmful'=1; 'Harmful'=2; 'Somewhat harmful'=3; 'Neither beneficial nor harmful'=4; 'Somewhat beneficial'=5; 'Beneficial'=6; 'Very beneficial'=7"))
dat2$covid8=8-as.numeric(car::recode(dat2[,24], "'Very foolish'=1; 'Foolish'=2; 'Somewhat foolish'=3; 'Neither wise nor foolish'=4; 'Somewhat wise'=5; 'Wise'=6; 'Very wise'=7"))
dat2$covid9=8-as.numeric(car::recode(dat2[,25], "'Very unlikely'=1; 'Unlikely'=2; 'Somewhat unlikely'=3; 'Neither likely nor unlikely'=4; 'Somewhat likely'=5; 'Likely'=6; 'Very likely'=7"))

COVID=cbind(dat2$covid.fam,dat2$covid2,dat2$covid3,dat2$covid4,dat2$covid5,dat2$covid6,dat2$covid7,dat2$covid8,dat2$covid9)
COVID.PAR=fa.parallel(COVID, fm = 'minres', fa = 'fa')
COVID.Factor <- fa(COVID,nfactors = 1,rotate = "oblimin",fm="minres")
print(COVID.Factor$loadings)

COVID=cbind(dat2$covid2,dat2$covid3,dat2$covid4,dat2$covid5,dat2$covid6,dat2$covid7,dat2$covid8,dat2$covid9)
COVID.PAR=fa.parallel(COVID, fm = 'minres', fa = 'fa')
COVID.Factor <- fa(COVID,nfactors = 1,rotate = "oblimin",fm="minres")
print(COVID.Factor$loadings)

psych::alpha(COVID)[1]


COVID=rowMeans(COVID)





####################################################
####################################################
# 
#               Pre- vs. Post-pandemic  Analysis
#
####################################################
####################################################

# Formatting demographic stuff
dat1$male=ifelse(dat1$sex=="Male",1,0)
dat2$male=ifelse(dat2$What.is.your.sex.=="Male",1,0)

dat1$white=ifelse(dat1$ethnicity=="White or European-American",1,0)
dat2$white=ifelse(dat2$What.is.your.ethnicity=="White or European-American",1,0)

dat1$col.degree=ifelse(dat1$education=="College degree",1,0)
dat1$grad=ifelse(dat1$education=="Some post-graduate work" | dat1$education=="Post-graduate degree",1,0)

dat2$col.degree=ifelse(dat2$What.is.the.highest.level.of.education.you.have.completed.=="College degree",1,0)
dat2$grad=ifelse(dat2$What.is.the.highest.level.of.education.you.have.completed.=="Some post-graduate work" | 
                   dat2$What.is.the.highest.level.of.education.you.have.completed.=="Post-graduate degree",1,0)

# Building the data frame
combined.dat=data.frame(c(1:nrow(dat1)),cbind(rep(0,nrow(dat1)),rep("pox",nrow(dat1)),pox1),
                        ages,dat1$male,dat1$white,dat1$col.degree,dat1$grad)
colnames(combined.dat)=c("subject","time","vaccine","attitude","age","male","white","colllege.d","grad")

add.mmr=data.frame(cbind(c(1:nrow(dat1)),rep(0,nrow(dat1)),rep("mmr",nrow(dat1)),mmr1),
                   ages,dat1$male,dat1$white,dat1$col.degree,dat1$grad)
colnames(add.mmr)=c("subject","time","vaccine","attitude","age","male","white","colllege.d","grad")
combined.dat=rbind(combined.dat,add.mmr)

add.hpv=data.frame(cbind(c(1:nrow(dat1)),rep(0,nrow(dat1)),rep("hpv",nrow(dat1)),hpv1),
                   ages,dat1$male,dat1$white,dat1$col.degree,dat1$grad)
colnames(add.hpv)=c("subject","time","vaccine","attitude","age","male","white","colllege.d","grad")
combined.dat=rbind(combined.dat,add.hpv)

add.flu=data.frame(cbind(c(1:nrow(dat1)),rep(0,nrow(dat1)),rep("flu",nrow(dat1)),flu1),
                   ages,dat1$male,dat1$white,dat1$col.degree,dat1$grad)
colnames(add.flu)=c("subject","time","vaccine","attitude","age","male","white","colllege.d","grad")
combined.dat=rbind(combined.dat,add.flu)

add.pox2=data.frame(cbind(c(1:nrow(dat2)),rep(1,nrow(dat2)),rep("pox",nrow(dat2)),pox2),
                    ages2,dat2$male,dat2$white,dat2$col.degree,dat2$grad)
colnames(add.pox2)=c("subject","time","vaccine","attitude","age","male","white","colllege.d","grad")
combined.dat=rbind(combined.dat,add.pox2)

add.mmr2=data.frame(cbind(c(1:nrow(dat2)),rep(1,nrow(dat2)),rep("mmr",nrow(dat2)),mmr2),
                    ages2,dat2$male,dat2$white,dat2$col.degree,dat2$grad)
colnames(add.mmr2)=c("subject","time","vaccine","attitude","age","male","white","colllege.d","grad")
combined.dat=rbind(combined.dat,add.mmr2)

add.hpv2=data.frame(cbind(c(1:nrow(dat2)),rep(1,nrow(dat2)),rep("hpv",nrow(dat2)),hpv2),
                    ages2,dat2$male,dat2$white,dat2$col.degree,dat2$grad)
colnames(add.hpv2)=c("subject","time","vaccine","attitude","age","male","white","colllege.d","grad")
combined.dat=rbind(combined.dat,add.hpv2)

add.flu2=data.frame(cbind(c(1:nrow(dat2)),rep(1,nrow(dat2)),rep("flu",nrow(dat2)),flu2),
                    ages2,dat2$male,dat2$white,dat2$col.degree,dat2$grad)
colnames(add.flu2)=c("subject","time","vaccine","attitude","age","male","white","colllege.d","grad")
combined.dat=rbind(combined.dat,add.flu2)


combined.dat$attitude=as.numeric(combined.dat$attitude)


# Looking at group means
aggregate(attitude~time+vaccine,combined.dat,mean)
aggregate(attitude~time,combined.dat,mean)
aggregate(attitude~vaccine,combined.dat,mean)


#  Multilevel models
library(lme4)
library(lmerTest)

mod=lmer(attitude~vaccine*time + (1|subject),data=combined.dat)
summary(mod)
0.8441/(0.8441+0.9821)


## How strongly can we interpret these interactions?
# BF01 = exp( [BIC(alt)-BIC(null)] / 2 )
null.mod=lmer(attitude~vaccine+time + (1|subject),data=combined.dat)

bf01=exp((BIC(mod)-BIC(null.mod))/2)   # >100
bf01 # 467358.2

library(MuMIn)
r.squaredGLMM(mod)

# Adding demographic variables to the model
demo.mod=lmer(attitude~vaccine*time + age + male + white + colllege.d + grad + (1|subject),data=combined.dat)
summary(demo.mod)

r.squaredGLMM(demo.mod)


######## Is there a pre/post x gender interaction for any of the vaccines?
# flu
flu.only=subset(combined.dat,combined.dat$vaccine=="flu")
flu.mod=aov(attitude~time*male,data=flu.only)
summary(flu.mod)
# hpv
hpv.only=subset(combined.dat,combined.dat$vaccine=="hpv")
hpv.mod=aov(attitude~time*male,data=hpv.only)
summary(hpv.mod)
# mmr
mmr.only=subset(combined.dat,combined.dat$vaccine=="mmr")
mmr.mod=aov(attitude~time*male,data=mmr.only)
summary(mmr.mod)
# pox
pox.only=subset(combined.dat,combined.dat$vaccine=="pox")
pox.mod=aov(attitude~time*male,data=pox.only)
summary(pox.mod)


####################################################
####################################################
# 
#              Pre/post pandemic  Bar Plot
#
####################################################
####################################################
library(gplots)

Mns=aggregate(attitude~time+vaccine,combined.dat,mean)[,3]
SDs=aggregate(attitude~time+vaccine,combined.dat,sd)[,3]
Ns=aggregate(attitude~time+vaccine,combined.dat,length)[,3]
SEs=c()
for (i in 1:length(Mns)){
  SEs=c(SEs,SDs[i]/sqrt(Ns[i]))
}
Mns=c(Mns,0,mean(COVID,na.rm=T))
SDs=c(SDs,0,sd(COVID,na.rm=T))
Ns=c(Ns,0,length(COVID))
SEs=c(SEs,0,SDs[10]/sqrt(Ns[10]))


png("change Figure 1.png", width = 600, height = 600)
barplot2(Mns,ylim=c(1,7), xpd=FALSE,
         ylab="Negative vaccine attitudes",
         plot.ci=T,ci.u=Mns+SEs,ci.l=Mns-SEs,
         col=rep(c("dark gray","light gray"),5))
dev.off()



##################################################################################
##################################################################################
# 
#               Post-pandemic: Risk perceptions interacting with politics?
#
##################################################################################
##################################################################################

# Recode the data, build new data frame
dat2$pols=as.numeric(
  car::recode(dat2$Which.of.the.following.best.describes.your.political.orientation.,
              "'Very liberal'=1; 'Liberal'=2; 'Somewhat liberal'=3; 'Neither liberal nor conservative'=4; 'Somewhat conservative'=5; 'Conservative'=6; 'Very conservative'=7"))


time2=data.frame(subject=integer(),vaccine=integer(),attitude=integer(),pols=integer(),
                 age=integer(),male=integer(),white=integer(),colllege.d=integer(),grad=integer())
for (i in 1:nrow(dat2)) {
  new_flu=c(i,"flu",flu2[i],dat2$pols[i],
            ages2[i],dat2$male[i],dat2$white[i],dat2$col.degree[i],dat2$grad[i])
  new_hpv=c(i,"hpv",hpv2[i],dat2$pols[i],
            ages2[i],dat2$male[i],dat2$white[i],dat2$col.degree[i],dat2$grad[i])
  new_mmr=c(i,"mmr",mmr2[i],dat2$pols[i],
            ages2[i],dat2$male[i],dat2$white[i],dat2$col.degree[i],dat2$grad[i])
  new_pox=c(i,"pox",pox2[i],dat2$pols[i],
            ages2[i],dat2$male[i],dat2$white[i],dat2$col.degree[i],dat2$grad[i])
  new_covid=c(i,"covid",COVID[i],dat2$pols[i],
              ages2[i],dat2$male[i],dat2$white[i],dat2$col.degree[i],dat2$grad[i])
  
  time2=rbind(time2,new_flu,new_hpv,new_mmr,new_pox,new_covid)
}
colnames(time2)=c("subject","vaccine","attitude","pols","age","male","white","col.degree","grad")
time2$attitude=as.numeric(time2$attitude)
time2$subject=as.numeric(time2$subject)
time2$pols=as.numeric(time2$pols)
time2$age=as.numeric(time2$age)
time2=subset(time2,time2$attitude != "NA")



# Model starts here
mod=lmer(time2$attitude~time2$vaccine*time2$pols + (1|time2$subject),data=time2)
summary(mod)

coef(mod) # Those subject-level intercepts have a good bit of variability.
          # This makes the weirdly low intercept in the model make more sense.
r.squaredGLMM(mod)


# Adding demographics to the model
com.time2=time2[complete.cases(time2),]
demo.mod=lmer(com.time2$attitude~com.time2$vaccine*com.time2$pols + com.time2$age + com.time2$male + com.time2$white + com.time2$col.degree + com.time2$grad + (1|com.time2$subject),data=com.time2)
summary(demo.mod)
r.squaredGLMM(demo.mod)



# Redone models
polorized.dat=subset(time2,time2$pols !=4)
polorized.dat$polar.conservative=ifelse(polorized.dat$pols>=5,1,0)

# How many conservatives and liberals in the data?
count.chocula=subset(polorized.dat,polorized.dat$vaccine=="flu")
table(count.chocula$polar.conservative)

cons.only=subset(count.chocula,count.chocula$polar.conservative==1)
libs.only=subset(count.chocula,count.chocula$polar.conservative==0)

nrow(cons.only)
sum(as.numeric(cons.only$col.degree)) # 27 / 44 have college degree
sum(as.numeric(cons.only$grad)) # 5 out of 44 have graduate degree
44 - (27+5) # 12 / 44 have neither

nrow(libs.only)
sum(as.numeric(libs.only$col.degree)) # 77 / 148 have college degree
sum(as.numeric(libs.only$grad)) # 37 / 148 have grad degree
148 - (37+77) # 34 out of 148 have neither

lib.cons.education=matrix(c(12,27,5,34,77,37),ncol=3,byrow=TRUE)
colnames(lib.cons.education)=c("No degree","College degree","Grad degree")
rownames(lib.cons.education)=c("Conservative","Liberal")
KICKS=chisq.test(lib.cons.education)
KICKS
KICKS$stdres


polar.mod=lmer(polorized.dat$attitude~polorized.dat$vaccine*polorized.dat$polar.conservative + (1|polorized.dat$subject),data=polorized.dat)
summary(polar.mod)


r.squaredGLMM(polar.mod)



# Original Figure 1
# Splitting up group means to make a plot
hist(time2$pols)
time2$conservative=ifelse(time2$pols>=5,1,0)
time2$liberal=ifelse(time2$pols<=3,1,0)

lib.only=subset(time2,time2$liberal==1)
lib.flu=subset(lib.only,lib.only$vaccine=="flu")$attitude
lib.hpv=subset(lib.only,lib.only$vaccine=="hpv")$attitude
lib.mmr=subset(lib.only,lib.only$vaccine=="mmr")$attitude
lib.pox=subset(lib.only,lib.only$vaccine=="pox")$attitude
lib.covid=subset(lib.only,lib.only$vaccine=="covid")$attitude

con.only=subset(time2,time2$conservative==1)
con.flu=subset(con.only,con.only$vaccine=="flu")$attitude
con.hpv=subset(con.only,con.only$vaccine=="hpv")$attitude
con.mmr=subset(con.only,con.only$vaccine=="mmr")$attitude
con.pox=subset(con.only,con.only$vaccine=="pox")$attitude
con.covid=subset(con.only,con.only$vaccine=="covid")$attitude


Mns=c(mean(lib.flu),mean(con.flu),mean(lib.hpv),mean(con.hpv),mean(lib.mmr),mean(con.mmr),mean(lib.pox),mean(con.pox),
      mean(lib.covid),mean(con.covid))
SDs=c(sd(lib.flu),sd(con.flu),sd(lib.hpv),sd(con.hpv),sd(lib.mmr),sd(con.mmr),sd(lib.pox),sd(con.pox),
      sd(lib.covid),sd(con.covid))
Ns=c(length(lib.flu),length(con.flu),length(lib.hpv),length(con.hpv),length(lib.mmr),length(con.mmr),length(lib.pox),length(con.pox),
     length(lib.covid),length(con.covid))
SEs=c()
for (i in 1:length(Mns)){
  SEs=c(SEs,SDs[i]/sqrt(Ns[i]))
}

library(gplots)

png("change Figure 2.png", width = 600, height = 600)
barplot2(Mns,ylim=c(1,7), xpd=FALSE,
         ylab="Risk perceptions",
         plot.ci=T,ci.u=Mns+SEs,ci.l=Mns-SEs,
         col=rep(c("blue","red"),5))
dev.off()








# Figure 1 but with more levels to "liberal" and "conservatives"
hist(time2$pols,breaks=seq(1,7,by=1))
# 1 = Very Liberal
# 2 = Liberal
# 3 = Somewhat liberal
# 4 = Neither liberal nor conservative
# 5 = Somewhat conservative
# 6 = Conservative
# 7 = Very conservative
time2$very.liberal=ifelse(time2$pols==1,1,0)
time2$liberal=ifelse(time2$pols==2,1,0)
time2$somewhat.liberal=ifelse(time2$pols==3,1,0)
time2$neither=ifelse(time2$pols==4,1,0)
time2$somewhat.cons=ifelse(time2$pols==5,1,0)
time2$cons=ifelse(time2$pols==6,1,0)
time2$very.cons=ifelse(time2$pols==7,1,0)







# Write a function to automate this
draw.plot=function(df){
  VLIB=subset(df,df$very.liberal==1)
  LIB=subset(df,df$liberal==1)
  SLIB=subset(df,df$somewhat.liberal==1)
  NEITHER=subset(df,df$neither==1)
  SCONS=subset(df,df$somewhat.cons==1)
  CONS=subset(df,df$cons==1)
  VCONS=subset(df,df$very.cons==1)
  
  to.plot=c(
    mean(VLIB$attitude),
    mean(LIB$attitude),
    mean(SLIB$attitude),
    mean(NEITHER$attitude),
    mean(SCONS$attitude),
    mean(CONS$attitude),
    mean(VCONS$attitude)
  )
  
  SEs=c(
    sd(VLIB$attitude)/sqrt(length(VLIB$attitude)),
    sd(LIB$attitude)/sqrt(length(LIB$attitude)),
    sd(SLIB$attitude)/sqrt(length(SLIB$attitude)),
    sd(NEITHER$attitude)/sqrt(length(NEITHER$attitude)),
    sd(SCONS$attitude)/sqrt(length(SCONS$attitude)),
    sd(CONS$attitude)/sqrt(length(CONS$attitude)),
    sd(VCONS$attitude)/sqrt(length(VCONS$attitude))
  )
  
  bar.bar=barplot(to.plot,ylim=c(0,7),las=2,ylab="Negative attitude",
          names=c(
    "Very liberal",
    "Liberal",
    "Slightly liberal",
    "Neither...",
    "Slightly conservatives",
    "Conservative",
    "Very conservative"),
    col=c("dodgerblue4","dodgerblue3","dodgerblue2","gray",
          "red","firebrick2","firebrick4")
    )

  bar.bar
  arrows(bar.bar,to.plot+SEs,bar.bar,to.plot-SEs,
         code=3,angle=90,length=0.05)
  
}



par(mar=c(9.5,4,2,2))
draw.plot(subset(time2,time2$vaccine=="flu"))
draw.plot(subset(time2,time2$vaccine=="hpv"))
draw.plot(subset(time2,time2$vaccine=="mmr"))
draw.plot(subset(time2,time2$vaccine=="pox"))
draw.plot(subset(time2,time2$vaccine=="covid"))






############################# Plot "conservative" and "liberal" cluster attitudes 
############################# next to actual conservative and liberal post-pandemic attitudes
######### Pre pandemic
#### Flu
k2$centers # 1 = "likely conservative", 2 = "likely liberal"
pre.flu=data.frame(flu1,cluster.assignments)
time2$conservative=ifelse(time2$pols>=5,1,0)
time2.flu=subset(time2,time2$vaccine=="flu")

# FLU
pre.flu.lib=subset(pre.flu,pre.flu$cluster.assignments==2)$flu1 # pre-pandemic liberal's flu attitudes
pre.flu.cons=subset(pre.flu,pre.flu$cluster.assignments==1)$flu1 # pre-pandemic conservative flu attitudes
post.flu.lib=subset(time2.flu,time2.flu$conservative==0)$attitude # post-pandemic liberal flu attitudes
post.flu.cons=subset(time2.flu,time2.flu$conservative==1)$attitude # post-pandemic conservative flu attitudes

# HPV
pre.flu=data.frame(hpv1,cluster.assignments)
pre.hpv.lib=subset(pre.flu,pre.flu$cluster.assignments==2)$hpv1
pre.hpv.cons=subset(pre.flu,pre.flu$cluster.assignments==1)$hpv1
time2.flu=subset(time2,time2$vaccine=="hpv")
post.hpv.lib=subset(time2.flu,time2.flu$conservative==0)$attitude
post.hpv.cons=subset(time2.flu,time2.flu$conservative==1)$attitude

# MMR
pre.flu=data.frame(mmr1,cluster.assignments)
pre.mmr.lib=subset(pre.flu,pre.flu$cluster.assignments==2)$mmr1
pre.mmr.cons=subset(pre.flu,pre.flu$cluster.assignments==1)$mmr1
time2.flu=subset(time2,time2$vaccine=="mmr")
post.mmr.lib=subset(time2.flu,time2.flu$conservative==0)$attitude
post.mmr.cons=subset(time2.flu,time2.flu$conservative==1)$attitude

# CHICKENPOX
pre.flu=data.frame(pox1,cluster.assignments)
pre.pox.lib=subset(pre.flu,pre.flu$cluster.assignments==2)$pox1
pre.pox.cons=subset(pre.flu,pre.flu$cluster.assignments==1)$pox1
time2.flu=subset(time2,time2$vaccine=="pox")
post.pox.lib=subset(time2.flu,time2.flu$conservative==0)$attitude
post.pox.cons=subset(time2.flu,time2.flu$conservative==1)$attitude

# COVID
time2.flu=subset(time2,time2$vaccine=="covid")
post.covid.lib=subset(time2.flu,time2.flu$conservative==0)$attitude
post.covid.cons=subset(time2.flu,time2.flu$conservative==1)$attitude

vax.means=c(
  mean(pre.flu.lib),mean(pre.flu.cons),mean(post.flu.lib),mean(post.flu.cons),
  mean(pre.hpv.lib),mean(pre.hpv.cons),mean(post.hpv.lib),mean(post.hpv.cons),
  mean(pre.mmr.lib),mean(pre.mmr.cons),mean(post.mmr.lib),mean(post.mmr.cons),
  mean(pre.pox.lib),mean(pre.pox.cons),mean(post.pox.lib),mean(post.pox.cons),
  0,0,mean(post.covid.lib),mean(post.covid.cons)
)
SEs=c(
  sd(pre.flu.lib)/sqrt(length(pre.flu.lib)),
  sd(pre.flu.cons)/sqrt(length(pre.flu.cons)),
  sd(post.flu.lib)/sqrt(length(post.flu.lib)),
  sd(post.flu.cons)/sqrt(length(post.flu.cons)),
  
  sd(pre.hpv.lib)/sqrt(length(pre.hpv.lib)),
  sd(pre.hpv.cons)/sqrt(length(pre.hpv.cons)),
  sd(post.hpv.lib)/sqrt(length(post.hpv.lib)),
  sd(post.hpv.cons)/sqrt(length(post.hpv.cons)),
  
  sd(pre.mmr.lib)/sqrt(length(pre.mmr.lib)),
  sd(pre.mmr.cons)/sqrt(length(pre.mmr.cons)),
  sd(post.mmr.lib)/sqrt(length(post.mmr.lib)),
  sd(post.mmr.cons)/sqrt(length(post.mmr.cons)),
  
  sd(pre.pox.lib)/sqrt(length(pre.pox.lib)),
  sd(pre.pox.cons)/sqrt(length(pre.pox.cons)),
  sd(post.pox.lib)/sqrt(length(post.pox.lib)),
  sd(post.pox.cons)/sqrt(length(post.pox.cons)),
  
  0,0,
  sd(post.covid.lib)/sqrt(length(post.covid.lib)),
  sd(post.covid.cons)/sqrt(length(post.covid.cons))
)

bar.bar=barplot(vax.means,ylim=c(0,7),col=c("dodgerblue1","firebrick1","dodgerblue4","firebrick3"),ylab="Negative vaccine attitude")


arrows(bar.bar,vax.means+SEs,bar.bar,vax.means-SEs,
       code=3,angle=90,length=0.05)








# ANOVA on the pre- post- data
NVA=c(pre.flu.lib,pre.flu.cons, #
      post.flu.lib,post.flu.cons,#
      pre.hpv.lib,pre.hpv.cons, #
      post.hpv.lib,post.hpv.cons,#
      pre.mmr.lib,pre.mmr.cons, #
      post.mmr.lib,post.mmr.cons, #
      pre.pox.lib,pre.pox.cons, #
      post.pox.lib,post.pox.cons)

vaccines=c(
  rep("flu",length(pre.flu.lib)),
  rep("flu",length(pre.flu.cons)),
  rep("flu",length(post.flu.lib)),
  rep("flu",length(post.flu.cons)),
  rep("hpv",length(pre.hpv.lib)),
  rep("hpv",length(pre.hpv.cons)),
  rep("hpv",length(post.hpv.lib)),
  rep("hpv",length(post.hpv.cons)),
  rep("mmr",length(pre.mmr.lib)),
  rep("mmr",length(pre.mmr.cons)),
  rep("mmr",length(post.mmr.lib)),
  rep("mmr",length(post.mmr.cons)),
  rep("pox",length(pre.pox.lib)),
  rep("pox",length(pre.pox.cons)),
  rep("pox",length(post.pox.lib)),
  rep("pox",length(post.pox.cons))
)

time=c(rep("pre",length(pre.flu.lib)),
       rep("pre",length(pre.flu.cons)),
       rep("post",length(post.flu.lib)),
       rep("post",length(post.flu.cons)),
       rep("pre",length(pre.hpv.lib)),
       rep("pre",length(pre.hpv.cons)),
       rep("post",length(post.hpv.lib)),
       rep("post",length(post.hpv.cons)),
       rep("pre",length(pre.mmr.lib)),
       rep("pre",length(pre.mmr.cons)),
       rep("post",length(post.mmr.lib)),
       rep("post",length(post.mmr.cons)),
       rep("pre",length(pre.pox.lib)),
       rep("pre",length(pre.pox.cons)),
       rep("post",length(post.pox.lib)),
       rep("post",length(post.pox.cons))
       )

lib.con=c(rep("lib",length(pre.flu.lib)),
       rep("cons",length(pre.flu.cons)),
       rep("lib",length(post.flu.lib)),
       rep("cons",length(post.flu.cons)),
       rep("lib",length(pre.hpv.lib)),
       rep("cons",length(pre.hpv.cons)),
       rep("lib",length(post.hpv.lib)),
       rep("cons",length(post.hpv.cons)),
       rep("lib",length(pre.mmr.lib)),
       rep("cons",length(pre.mmr.cons)),
       rep("lib",length(post.mmr.lib)),
       rep("cons",length(post.mmr.cons)),
       rep("lib",length(pre.pox.lib)),
       rep("cons",length(pre.pox.cons)),
       rep("lib",length(post.pox.lib)),
       rep("cons",length(post.pox.cons))
)

subject=c()

for (i in vector) {
  
}


length(NVA)
length(time)
length(lib.con)

new.mod=aov(NVA~time*lib.con)
summary(new.mod)
TukeyHSD(new.mod,which="time:lib.con")



colnames(time2)

subject.time1=seq(1,length(flu1))

table(cluster.assignments)


# TIME 1
is.cons=ifelse(cluster.assignments==2,1,0)

time1=data.frame(cbind(rep(1,length(flu1)),subject.time1,flu1,is.cons,rep("flu",length(flu1))))
colnames(time1)=c("time","subject","NVA","is.conservative","vaccine")
# ADD HPV
add.hpv=data.frame(cbind(rep(1,length(hpv1)),subject.time1,hpv1,is.cons,rep("hpv",length(hpv1))))
colnames(add.hpv)=c("time","subject","NVA","is.conservative","vaccine")
time1=rbind(time1,add.hpv)
# ADD MMR
add.mmr=data.frame(cbind(rep(1,length(mmr1)),subject.time1,mmr1,is.cons,rep("mmr",length(mmr1))))
colnames(add.mmr)=c("time","subject","NVA","is.conservative","vaccine")
time1=rbind(time1,add.mmr)
# ADD POX
add.pox=data.frame(cbind(rep(1,length(pox1)),subject.time1,pox1,is.cons,rep("pox",length(pox1))))
colnames(add.pox)=c("time","subject","NVA","is.conservative","vaccine")
time1=rbind(time1,add.pox)


# TIME 2
subject.time2=seq(1,length(flu2))
is.cons=ifelse(lib.cons>=5,1,0)

time2=data.frame(cbind(rep(2,length(flu2)),subject.time2,flu2,is.cons,rep("flu",length(flu2))))
colnames(time2)=c("time","subject","NVA","is.conservative","vaccine")
# ADD HPV
add.hpv=data.frame(cbind(rep(2,length(hpv2)),subject.time2,hpv2,is.cons,rep("hpv",length(hpv2))))
colnames(add.hpv)=c("time","subject","NVA","is.conservative","vaccine")
time2=rbind(time2,add.hpv)
# ADD MMR
add.mmr=data.frame(cbind(rep(2,length(mmr2)),subject.time2,mmr2,is.cons,rep("mmr",length(mmr2))))
colnames(add.mmr)=c("time","subject","NVA","is.conservative","vaccine")
time2=rbind(time2,add.mmr)
# ADD POX
add.pox=data.frame(cbind(rep(2,length(pox2)),subject.time2,pox2,is.cons,rep("pox",length(pox2))))
colnames(add.pox)=c("time","subject","NVA","is.conservative","vaccine")
time2=rbind(time2,add.pox)

final.dat=rbind(time1,time2)
final.dat$NVA=as.numeric(final.dat$NVA)
final.dat$time=factor(final.dat$time)
final.dat$is.conservative=as.numeric(final.dat$is.conservative)

final.mod=lmer(NVA~vaccine+time*is.conservative+(1|subject),final.dat)
summary(final.mod)
r.squaredGLMM(final.mod)



